library(plyr)
library(lme4)
library(rgdal)

horse <- read.csv("horse_replication_data.csv",
	header=T)

## Reproduce the information featured in Table 1: Summary statistics
summary(horse[,c("Interest","Education","professionalism",
	"hhirev","pop","closeness.ep",
	"polarization.ep","enep.ep")])

## Reproduce the information featured in Table 2: Logistic regression model
## Binomial characterisation
## Might take a minute or so

system.time(horse.mod.topic.ep.bin <- lmer(cbind(horse.count,story.count-horse.count)~
	scale(Interest) + scale(Education) + ## citizen level
	scale(professionalism) + ## journalist level
	outlet.type + scale(hhirev) + scale(log(pop)) + ## market level
	scale(closeness.ep) + scale(polarization.ep) + scale(enep.ep) + ## polity level
	weekend + log1p(daystoelection) + ## misc
	(1|topic) + 
	(1|iso3c/outlet),
	data=horse,
	family=binomial(link="logit")))

summary(horse.mod.topic.ep.bin)

## Get PCP and GMP
predprobs <- rep(fitted(horse.mod.topic.ep.bin),times=horse$story.count)
horse$foo<-1:nrow(horse)
y <- dlply(horse,.(foo),function(df) {
	if (df$horse.count==0) {
		retval <- rep(0,times=(df$story.count))
	} else {
		retval<-c(rep(1,times=df$horse.count),rep(0,times=(df$story.count-df$horse.count)))
	}
	retval
})
y <- unlist(y)
preds <- as.numeric(predprobs > 0.5)
pcp <- sum(preds == y) / length(y)

pijt_1<-log(predprobs)
pijt_0<-log(1-predprobs) ## optionally, pijt_0<-log(1-predprob+1e-7)
 
pijt_1<-pijt_1*(y==1)
pijt_0<-pijt_0*(y==0)
# 
my.logLik<-sum(pijt_0,pijt_1,na.rm=T)
 
my.A<-length(y)
 
GMP<-exp(my.logLik/my.A)
GMP


## Reproduce Figure 1: 
## Lengthy data-preparation

my.subset <- horse[,c("Interest","Education",
	"professionalism",
	"outlet.type","hhirev","pop",
	"closeness.ep","polarization.ep","enep.ep",
	"weekend","daystoelection",
	"outlet","iso3c","year")]

my.subset$Interest <- scale(my.subset$Interest)
my.subset$Education <- scale(my.subset$Education)
my.subset$hhirev <- scale(my.subset$hhirev)
my.subset$scalelogpop <- scale(log(my.subset$pop))
my.subset$closeness.ep <- scale(my.subset$closeness.ep)
my.subset$polarization.ep <- scale(my.subset$polarization.ep)
my.subset$enep.ep <- scale(my.subset$enep.ep)
my.subset$weekend <- as.numeric(my.subset$weekend)
my.subset$topic <- NULL

my.subset <- my.subset[my.subset$year== 2009,]
my.subset<-unique(my.subset)


my.subset2 <- ddply(my.subset,.(iso3c,outlet),function(df) {
	numeric.vars <- which(sapply(df,is.numeric))
	character.vars <- which(sapply(df,is.numeric)==FALSE)
	character.vars <- unique(df[,character.vars])
	if (nrow(character.vars)>1) {
		warning(as.character(unlist(character.vars)))
	}
	data.frame(Interest=mean(df$Interest),Education=mean(df$Education),
		professionalism=mean(df$professionalism),
		hhirev=mean(df$hhirev),scalelogpop=mean(df$scalelogpop),
		closeness.ep=mean(df$closeness.ep),polarization.ep=mean(df$polarization.ep),
		enep.ep=mean(df$enep.ep),	weekend=mean(df$weekend),
		daystoelection=mean(df$daystoelection),
		outlet.type=unique(df$outlet.type))
})

my.subset3 <- aggregate(my.subset2,list(iso3c=my.subset2$iso3c,outlet.type=my.subset2$outlet.type),mean)
keep.list<-which(sapply(my.subset3,function(x)!all(is.na(x))))
my.subset3<-my.subset3[,keep.list]
my.subset3$fixedpreds <- NA
fixies <- fixef(horse.mod.topic.ep.bin)

for (i in 1:nrow(my.subset3)) {
	outlet.coef <- fixies[grepl(as.character(my.subset3$outlet.type[i]),names(fixies))]
	if (length(outlet.coef)==0) {
		outlet.coef <- 0
	}
	my.fixef <- ( fixies["(Intercept)"] + 
		my.subset3$Interest[i] * fixies["scale(Interest)"] +
		my.subset3$Education[i] * fixies["scale(Education)"] +
		my.subset3$professionalism[i] * fixies["scale(professionalism)"] +
		my.subset3$hhirev[i] * fixies["scale(hhirev)"] +
		my.subset3$scalelogpop[i] * fixies["scale(log(pop))"] +
		my.subset3$closeness.ep[i] * fixies["scale(closeness.ep)"] +
		my.subset3$polarization.ep[i] * fixies["scale(polarization.ep)"] +
		my.subset3$enep.ep[i] * fixies["scale(enep.ep)"] +
		my.subset3$weekend[i] * fixies["weekendTRUE"] +
		log1p(my.subset3$daystoelection[i]) * fixies["log1p(daystoelection)"] +
		outlet.coef
		)
	if (length(my.fixef)==0) {
		warning(i)
	} else {
		my.subset3$fixedpreds[i] <- my.fixef
	}
}

ctry.rand <- ranef(horse.mod.topic.ep.bin)$iso3c
outlet.rand <-ranef(horse.mod.topic.ep.bin)$`outlet:iso3c`
outlet.rand <- data.frame(outlet=gsub(":.*","",rownames(outlet.rand)),
	value = outlet.rand,
	iso3c=gsub(".*?:","",rownames(outlet.rand)))
names(outlet.rand)<-c("outlet","outlet.value","iso3c")
outlet.rand <- merge(outlet.rand,my.subset2[,c("outlet","outlet.type")],all.x=T,all.y=F)
outlet.rand <- outlet.rand[!is.na(outlet.rand$outlet.type),]
outlet.rand <- aggregate(outlet.rand,list(iso3c=outlet.rand$iso3c,outlet.type=outlet.rand$outlet.type),mean)
keep.list<-which(sapply(outlet.rand,function(x)!all(is.na(x))))
outlet.rand<-outlet.rand[,keep.list]

topic.rand <- ranef(horse.mod.topic.ep.bin)$topic
topic.rand <- data.frame(Topic=rownames(topic.rand),value=topic.rand)
names(topic.rand)<-c("Topic","Topic.value")

my.subset4 <- merge(merge(my.subset3, ctry.rand),outlet.rand)
my.subset5 <- merge(my.subset4, topic.rand,by=character(0),all=T)

expit <- function(x) { exp(x)/(1+exp(x)) }

my.subset5$finalpred <- expit(my.subset5$fixedpreds + my.subset5$outlet.value + my.subset5$Topic.value)

# Data from http://thematicmapping.org/downloads/world_borders.php.
# Direct link: http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip
# Unpack and put the files in a dir 'data'


world.map <- readOGR(dsn="shape", layer="TM_WORLD_BORDERS_SIMPL-0.3")
world.ggmap <- fortify(world.map, region = "ISO3")
my.subset6 <- subset(my.subset5,Topic %in% c("EU elections","Other elections") & outlet.type %in% c("Commercial TV","Quality"))
my.subset6$fillfactor <- cut(my.subset6$finalpred,
	breaks=quantile(my.subset6$finalpred, probs=seq(0,1, by=1/9)), 
	include.lowest=TRUE)

my.subset6$wrap <- paste(as.character(my.subset6$outlet.type),as.character(my.subset6$Topic),sep=" / ")

cut.plot <- ggplot(my.subset6, aes(map_id = iso3c)) +
	geom_map(aes(fill = fillfactor, colour='black'),map =world.ggmap) +
	scale_colour_manual(values=c('black'),guide="none") +
	scale_y_continuous(limits=c(35,70),breaks=NULL) + 
	scale_x_continuous(limits=c(-10,30),breaks=NULL) + 
	facet_wrap(~wrap) + 
	scale_fill_brewer("Predicted probability",type="seq",palette="Oranges") + 
 	theme_bw() + opts(legend.position="bottom", 
		strip.text.x = theme_text(size = 12),
		strip.background = theme_rect(fill = 'white',colour='white'),
		panel.grid.minor = theme_blank() , panel.grid.major=theme_blank(),
		panel.border= theme_blank()) + guides(fill = guide_legend(nrow = 3))


print(cut.plot)
